data("sleepstudy")
head(sleepstudy)
##   Reaction Days Subject
## 1 249.5600    0     308
## 2 258.7047    1     308
## 3 250.8006    2     308
## 4 321.4398    3     308
## 5 356.8519    4     308
## 6 414.6901    5     308
skimr::skim(sleepstudy)
Data summary
Name sleepstudy
Number of rows 180
Number of columns 3
_______________________
Column type frequency:
factor 1
numeric 2
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
Subject 0 1 FALSE 18 308: 10, 309: 10, 310: 10, 330: 10

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Reaction 0 1 298.51 56.33 194.33 255.38 288.65 336.75 466.35 ▃▇▆▂▁
Days 0 1 4.50 2.88 0.00 2.00 4.50 7.00 9.00 ▇▇▇▇▇
summary(sleepstudy)
##     Reaction          Days        Subject   
##  Min.   :194.3   Min.   :0.0   308    : 10  
##  1st Qu.:255.4   1st Qu.:2.0   309    : 10  
##  Median :288.7   Median :4.5   310    : 10  
##  Mean   :298.5   Mean   :4.5   330    : 10  
##  3rd Qu.:336.8   3rd Qu.:7.0   331    : 10  
##  Max.   :466.4   Max.   :9.0   332    : 10  
##                                (Other):120
ggplot(sleepstudy, aes(Days, Reaction)) + geom_point() + facet_wrap(~Subject) + geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

ggplot(sleepstudy, aes(Subject, Reaction)) + geom_boxplot()

testrandom <- plyr::ddply(sleepstudy, .(Subject), function(x) {
  m <- lm(Reaction ~ Days, data = x)
  data.frame(x, fitted = fitted(m)) 
})

ggplot(testrandom, aes(Days, fitted, group = Subject)) + geom_line()

Therefore, from the plots, the model needs the random intercept and random slope.

# random intercept and random slope
slmod1 <- lmer(Reaction ~ Days + (Days|Subject), data = sleepstudy)
# independent random effects
slmod2 <- lmer(Reaction ~ Days + (Days -1 |Subject) + (1|Subject), data = sleepstudy)
#summary(slmod1)
#summary(slmod2)

set.seed(987654321)
sl.mod2.sims  <- simulate(slmod2, nsim = 19)
sl.mod2.refit <- lapply(sl.mod2.sims, refit, object = slmod2)
sl.mod2.sim.ranef <- lapply(sl.mod2.refit, function(x) ranef(x)[[1]])
sl.mod2.sim.ranef <- do.call("rbind", sl.mod2.sim.ranef)
sl.mod2.sim.ranef$.n <- rownames(sl.mod2.sim.ranef)
sl.mod2.sim.ranef$.n <- as.numeric(str_extract(sl.mod2.sim.ranef$.n, "\\d+"))
true.sl.mod1.ranef <- ranef(slmod1)$Subject
sl_df <- nullabor:::add_true(sl.mod2.sim.ranef, true.sl.mod1.ranef, pos = 9)
ggplot(data = sl_df, aes(x = `(Intercept)`, y = Days)) + 
  geom_point(alpha = 0.4) +
  geom_smooth(method = "lm", se = FALSE, alpha = 0.4) +
  facet_wrap( ~ .sample, ncol=5) + 
  xlab(NULL) + 
  ylab(NULL)
## `geom_smooth()` using formula 'y ~ x'

Since the true plot is indistinguishable from the null plots, there is no need for the correlation between the random effects.

Hence the model is shown in slmod2.

From the true model, replicating the data for 4 times by \(y \sim (X\beta, \Omega)\), where \(\Omega = Z\Gamma Z^\top + R\) and refitting the model with \(y = X\beta + Zb + e\) and get the plots & null plots.

Since \(M_i\) are the same for each case, the plot 7 and 8 are ignored!

\[\boldsymbol y \sim N(X\beta, Z \Gamma Z^\top + R)\]

extract.fitted.mod.v1 <- function(mod, data){
  N <- nrow(data)
  subjects <- unique(data$Subject)
  nsubjects <- length(subjects)

  y <- data$y
  X <- getME(mod, "X")
  beta <- fixef(mod)
  Z <- getME(mod, "Z")
  u <- c(ranef(mod)$Subject[,1], ranef(mod)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(mod))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 

  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(mod) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(mod) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G

  fitted_df <- data %>% 
    dplyr::mutate(
      fitted = as.vector(X %*% beta),
      resm = y - fitted,
      resc = as.vector(y - fitted - Z %*% u),
      index = 1:n()) %>% 
    mutate(resm_std = resm/sqrt(diag(varMargRes)[index]),
           resc_std = resc/sqrt(diag(varCondRes)[index])) %>% 
    ungroup()
  
  return(fitted_df)
}
extract.unit.mod.v1 <- function(mod, data){
  N <- nrow(data)
  subjects <- unique(data$Subject)
  nsubjects <- length(subjects)

  y <- data$y
  X <- getME(mod, "X")
  beta <- fixef(mod)
  Z <- getME(mod, "Z")
  u <- c(ranef(mod)$Subject[,1], ranef(mod)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(mod))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 

  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(mod) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(mod) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted <-  as.vector(X %*% beta)
  resm <-  y - fitted
  resc <-  as.vector(y - fitted - Z %*% u)

  unit_df <- expand_grid(subjects) %>% 
  dplyr::mutate(Vi = map_dbl(subjects, ~{
    ind <- data %>% 
      dplyr::mutate(index = 1:n()) %>% 
      filter(Subject == .x) %>% 
      pull(index)
    mi <- length(ind)
    Ei <- solve(expm::sqrtm(varMargRes[ind, ind])) %*% resm[ind]
    #norm((diag(mi)- Ei %*% t(Ei)))
    v1 <- (diag(mi)- Ei %*% t(Ei))
    sqrt(sum(diag(v1 %*% t(v1)))) / mi
    #sqrt(sum((diag(mi) - Ei %*% t(Ei))^2)) / mi
  }),
  Mi = as.vector(t(u) %*% solve(varU) %*% u),
  index = 1:n())
  
  return(unit_df)
}
extract.lcr.mod.v1 <- function(mod, data){
  N <- nrow(data)
  subjects <- unique(data$Subject)
  nsubjects <- length(subjects)

  y <- data$y
  X <- getME(mod, "X")
  beta <- fixef(mod)
  Z <- getME(mod, "Z")
  u <- c(ranef(mod)$Subject[,1], ranef(mod)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(mod))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 

  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(mod) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(mod) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted <-  as.vector(X %*% beta)
  resm <-  y - fitted
  resc <-  as.vector(y - fitted - Z %*% u)
  index <- 1:nrow(data)
  resm_std <-  resm/sqrt(diag(varMargRes)[index])
  resc_std = resc/sqrt(diag(varCondRes)[index])
  
  R_half <- expm::sqrtm(R)

  auxqn <- eigen((R_half %*% P %*% R_half), symmetric = T, only.values = FALSE)

  p <- length(beta)

  lt <- sqrt(solve(diag((auxqn$values[1:(N-p)])))) %*% t(auxqn$vectors[1:N,1:(N-p)]) %*% solve(expm::sqrtm(R[1:N,1:N]))

  var.resmcp <- lt %*% varCondRes[1:N,1:N] %*% t(lt)

  resmcp <- (lt %*% resc[1:N] )/sqrt(diag(var.resmcp))

  resmcp_sl <- tibble::tibble(resmcp)
  
  return(resmcp_sl)
}
sim <- function(mod, data, nsim = 19, seed, std = FALSE){
  fit.sim <- simulate(mod, nsim = nsim, seed = seed)
  fit.refit <- lapply(fit.sim, refit, object = mod)
  fit.simy <- lapply(fit.refit, function(x) getME(x, 'y'))
  fit.simy.y <- do.call("cbind", fit.simy)
  fit.simy.y <- reshape2::melt(fit.simy.y)[-1]
  names(fit.simy.y) <- c(".n", "y")
  fit.simy.y$.n <- as.numeric(str_extract(fit.simy.y$.n, "\\d+"))
  fit.simy.y$Subject <- rep(data$Subject, 19)
  fit.simy.y$Days <- rep(data$Days, 19)
  return(fit.simy.y)
}

Version 1

Data plots from the ‘best’ model

repc <- sleepstudy
colnames(repc)[1] <- c("y")
v1.fitted <- extract.fitted.mod.v1(slmod2, repc)
v1.unit <- extract.unit.mod.v1(slmod2, repc)
v1.lcr <- extract.lcr.mod.v1(slmod2, repc)
ggplot(v1.fitted, aes(fitted, resm_std)) + geom_point() + geom_smooth(method = "loess")
## `geom_smooth()` using formula 'y ~ x'

ggplot(v1.fitted, aes(index, resm_std, color = Subject)) + geom_point() + geom_hline(yintercept = 0) +theme(legend.position = "none")

# Dashed line: third quartile + 1.5 interquartile range
LSlesverb <- as.numeric(quantile(v1.unit$Vi,prob = 0.75) + 1.5*(quantile(v1.unit$Vi, prob = 0.75) - quantile(v1.unit$Vi,prob = 0.25)))
ggplot(v1.unit, aes(index, Vi)) + geom_point() + geom_hline(yintercept = LSlesverb, lty = 2) +
  geom_text(
    data = v1.unit %>% 
      filter(Vi>LSlesverb),
    aes(label = index),
    nudge_x = 0.35, nudge_y = 0.35,
    size = 2
  )

ggplot(v1.fitted, aes(index, resc_std, color = Subject)) + geom_point() + geom_hline(yintercept = 0) + theme(legend.position = "none") +
  geom_text(
    data = v1.fitted %>% 
      filter(abs(resc_std)>4),
    aes(label = index),
    nudge_x = 0.35, nudge_y = 0.35,
    check_overlap = T,
    size = 2
  )

ggplot(v1.fitted, aes(fitted, resc_std)) + geom_point() + 
  geom_text(
    data = v1.fitted %>% 
      filter(abs(resc_std)>4),
    aes(label = index),
    nudge_x = 0.35, nudge_y = 0.35,
    check_overlap = T,
    size = 2
  )

ggplot(v1.fitted, aes(sample = resc_std)) + geom_qq() + geom_qq_line(color = 'red')

ggplot(v1.lcr, aes(sample = resmcp)) + geom_qq() + geom_qq_line(color = "red")

  • There is no obvious trend in plot 1. It seems to be linearity of effects fexed (\(\boldsymbol{\mathbb{E}[y] = X\beta}\)).
  • There is no evidence showing the presence of outlying observations.
  • The indexs (1 and 6) indicates that those units’ covariance structure should be modified in Plot 3.
  • Plot 4 show that the the index for 8, 57 and 60 are the outlying observations in the reaction values.
    • Index 8 shows a significant drop at day 7 for subject 308
    • Index 57 shows an increase at day 6 for subject 332
    • Index 60 shows a drop at day 9 for subject 332
  • Although there are some outliers in the plot 5, there is no evidence show heteroscedasticity of conditional residuals.
  • Plot 6 shows the Gaussian QQ plot for the conditional residuals. However, from the plot, there are obvious tails. One extreme point on the upper end and two on the lower end may seem exceptional. The conditonal residuals might not follow a normal distribution.
  • Plot 7 shows the Gaussian QQ plot for the least confounded residuals. There also shows some tails.

1st replication

Simulation

v11.sim <- sim(slmod2, repc, seed = 1234)
v11.sim.fitted <- purrr::map_dfr(1:19, function(i){
  df <- v11.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  tibble::tibble(df, fitted, resm_std, resc_std, index)
})
v11_simlcr <- purrr::map_dfr(1:19, function(i){
  df <- v11.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  R_half <- expm::sqrtm(R)

  auxqn <- eigen((R_half %*% P %*% R_half), symmetric = T, only.values = FALSE)

  p <- length(beta)

  lt <- sqrt(solve(diag((auxqn$values[1:(N-p)])))) %*% t(auxqn$vectors[1:N,1:(N-p)]) %*% solve(expm::sqrtm(R[1:N,1:N]))

  var.resmcp <- lt %*% varCondRes[1:N,1:N] %*% t(lt)

  resmcp <- (lt %*% resc[1:N] )/sqrt(diag(var.resmcp))

  tibble::tibble(df[1:178,], resmcp)
})
v11.sim.unit <- purrr::map_dfr(1:19, function(i){
  df <- v11.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  expand_grid(subjects) %>% 
    dplyr::mutate(Vi = map_dbl(subjects, ~{
      ind <- df %>% 
        mutate(index = 1:n()) %>% 
        filter(Subject == .x) %>% 
        pull(index)
      mi <- length(ind)
      Ei <- solve(expm::sqrtm(varMargRes[ind, ind])) %*% resm[ind]
      sqrt(sum((diag(mi) - Ei %*% t(Ei))^2)) / mi
      }),
      Mi = as.vector(t(u) %*% solve(varU) %*% u),
      index = 1:n(),
      .n = i)
})
Lineup plots
lineup(true = v1.fitted, samples = v11.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resm_std)) + 
  geom_point() + 
  geom_smooth(method = "loess") +
  facet_wrap( ~ .sample, ncol=5)
## `geom_smooth()` using formula 'y ~ x'

lineup(true = v1.fitted, samples = v11.sim.fitted, pos = 7) %>% 
  ggplot(aes(index, resm_std, color = Subject)) + 
  geom_point() + 
  geom_hline(yintercept = 0) +
  facet_wrap( ~ .sample, ncol=5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • The true plot might be hard to be distinguished.
lineup(true = v1.unit, samples = v11.sim.unit, pos = 9) %>% 
  ggplot(aes(index, Vi)) + geom_point() +
  facet_wrap(~.sample, nrow = 5)

  • The true plot can be identified. Therefore the covariance matrix (\(\Omega_i\)) need to be modified.
lineup(true = v1.fitted, samples = v11.sim.fitted, pos = 7) %>% 
  ggplot(aes(index, resc_std, color = Subject)) + 
  geom_point() + 
  geom_hline(yintercept = 0) +
  facet_wrap( ~ .sample, ncol=5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • By comparing with the marginal residual, the conditional residual seems to be better on detecting the plots.
  • The true plot might be identified since there are three outliers, two at the bottom and one at the top.
lineup(true = v1.fitted, samples = v11.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resc_std)) + 
  geom_point() + 
  geom_hline(yintercept = 0) +
  facet_wrap( ~ .sample, ncol=5)

lineup(true = v1.fitted, samples = v11.sim.fitted, pos = 9) %>% 
  ggplot(aes(sample = resc_std)) + 
  geom_qq() + geom_qq_line(color = "red") +
  facet_wrap( ~ .sample, ncol=5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • The true plot should be identified. One extreme point on the upper end and two on the lower end may seem exceptional compared with the overall trend of conditional residual distribution. Hence, the conditional residual does not follow a normal distribution.
lineup(true = v1.lcr, samples = v11_simlcr, pos = 9) %>% 
  ggplot(aes(sample = resmcp)) + 
  geom_qq() + geom_qq_line(color = "red") +
  facet_wrap( ~ .sample, ncol=5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • By comparing with the conditional residual, the true plot for the least confounded residual may not able to be distinguished. Althogh it may not a normal distribution.

2nd replication

Simulation

v12.sim <- sim(slmod2, repc, seed = 2345)
v12.sim.fitted <- purrr::map_dfr(1:19, function(i){
  df <- v12.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  tibble::tibble(df, fitted, resm_std, resc_std, index)
})
v12_simlcr <- purrr::map_dfr(1:19, function(i){
  df <- v12.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  R_half <- expm::sqrtm(R)

  auxqn <- eigen((R_half %*% P %*% R_half), symmetric = T, only.values = FALSE)

  p <- length(beta)

  lt <- sqrt(solve(diag((auxqn$values[1:(N-p)])))) %*% t(auxqn$vectors[1:N,1:(N-p)]) %*% solve(expm::sqrtm(R[1:N,1:N]))

  var.resmcp <- lt %*% varCondRes[1:N,1:N] %*% t(lt)

  resmcp <- (lt %*% resc[1:N] )/sqrt(diag(var.resmcp))
  .n = i

  tibble::tibble(.n, resmcp)
})
v12.sim.unit <- purrr::map_dfr(1:19, function(i){
  df <- v12.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  expand_grid(subjects) %>% 
    dplyr::mutate(Vi = map_dbl(subjects, ~{
      ind <- df %>% 
        mutate(index = 1:n()) %>% 
        filter(Subject == .x) %>% 
        pull(index)
      mi <- length(ind)
      Ei <- solve(expm::sqrtm(varMargRes[ind, ind])) %*% resm[ind]
      sqrt(sum((diag(mi) - Ei %*% t(Ei))^2)) / mi
      }),
      Mi = as.vector(t(u) %*% solve(varU) %*% u),
      index = 1:n(),
      .n = i)
})

Lineup plots

lineup(true = v1.fitted, samples = v12.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resm_std)) + geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5)

lineup(true = v1.fitted, samples = v12.sim.fitted, pos = 7) %>% 
  ggplot(aes(index, resm_std, color = Subject)) + 
  geom_point() + 
  geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • I would expect that the viewers can see the value for the orange part are squeezed together at the bottom. And an outlier on the top for the green subject.
lineup(true = v1.unit, samples = v12.sim.unit, pos = 9) %>% 
  ggplot(aes(index, Vi)) + geom_point() +
  facet_wrap(~.sample, nrow = 5)

  • Two points at the top seem like outliers then the true plot can be identified. It shows that the covariance matrix of these two units need to modified.
lineup(true = v1.fitted, samples = v12.sim.fitted, pos = 7) %>% 
  ggplot(aes(index, resc_std, color = Subject)) + 
  geom_point() + 
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = 4, lty = 3) +
  geom_hline(yintercept = -4, lty = 3) +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • Two points on the bottom and one point at the top are exceptional. Then the true plot can be identified.
  • The true plot from conditional residual is easy to identified compared with the null plots of marginal residuals.
lineup(true = v1.fitted, samples = v12.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resc_std)) + geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5)

  • Aim is to see if there is a pattern in the plot which conditional residual v.s fitted value. If shown, then it shows the heteroscedasticity of conditional residuals.
lineup(true = v1.fitted, samples = v12.sim.fitted, pos = 9) %>% 
  ggplot(aes(sample = resc_std)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • Two points on the lower end and one plot on the upper end seem exceptional compared to overall conditional residual distribution. Hence, for the true plot, the distribution is not normal.
lineup(true = v1.lcr, samples = v12_simlcr, pos = 9) %>% 
  ggplot(aes(sample = resmcp)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

For the least confounded residual, it is hard to distinguish the true plot among the null plots.

3rd replication

Simulation

v13.sim <- sim(slmod2, repc, seed = 3456)
v13.sim.fitted <- purrr::map_dfr(1:19, function(i){
  df <- v13.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  tibble::tibble(df, fitted, resm_std, resc_std, index)
})
v13_simlcr <- purrr::map_dfr(1:19, function(i){
  df <- v13.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  R_half <- expm::sqrtm(R)

  auxqn <- eigen((R_half %*% P %*% R_half), symmetric = T, only.values = FALSE)

  p <- length(beta)

  lt <- sqrt(solve(diag((auxqn$values[1:(N-p)])))) %*% t(auxqn$vectors[1:N,1:(N-p)]) %*% solve(expm::sqrtm(R[1:N,1:N]))

  var.resmcp <- lt %*% varCondRes[1:N,1:N] %*% t(lt)

  resmcp <- (lt %*% resc[1:N] )/sqrt(diag(var.resmcp))
  .n = i

  tibble::tibble(.n, resmcp)
})
v13.sim.unit <- purrr::map_dfr(1:19, function(i){
  df <- v13.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  expand_grid(subjects) %>% 
    dplyr::mutate(Vi = map_dbl(subjects, ~{
      ind <- df %>% 
        mutate(index = 1:n()) %>% 
        filter(Subject == .x) %>% 
        pull(index)
      mi <- length(ind)
      Ei <- solve(expm::sqrtm(varMargRes[ind, ind])) %*% resm[ind]
      sqrt(sum((diag(mi) - Ei %*% t(Ei))^2)) / mi
      }),
      Mi = as.vector(t(u) %*% solve(varU) %*% u),
      index = 1:n(),
      .n = i)
})

Lineup plots

lineup(true = v1.fitted, samples = v13.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resm_std)) + 
  geom_point(alpha = 0.8) + 
  geom_hline(yintercept = 0) +
  geom_smooth(method='loess') +
  facet_wrap(~.sample, nrow = 5)
## `geom_smooth()` using formula 'y ~ x'

lineup(true = v1.fitted, samples = v13.sim.fitted, pos = 13) %>% 
  ggplot(aes(index, resm_std, color = Subject)) + geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

lineup(true = v1.unit, samples = v13.sim.unit, pos = 8) %>% 
  ggplot(aes(index, Vi)) + geom_point() +
  facet_wrap(~.sample, nrow = 5)

For this plot, the true plot might be detected. Then the inadequate measure of covariance structure is identified.

lineup(true = v1.fitted, samples = v13.sim.fitted, pos = 13) %>% 
  ggplot(aes(index, resc_std, color = Subject)) + geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

The conditional residual plot is more easy compared with marginal residual to detect the true plot.

True plot is identified, there is evidence showing the outlying observation.

lineup(true = v1.fitted, samples = v13.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resc_std)) + geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5)

  • There are some patterns showing in the null plots.
lineup(true = v1.fitted, samples = v13.sim.fitted, pos = 4) %>% 
  ggplot(aes(sample = resc_std)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • Identified!
lineup(true = v1.lcr, samples = v13_simlcr, pos = 4) %>% 
  ggplot(aes(sample = resmcp)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

Same here, the conditional residual QQ-plot is easy to identify. However, the least confounded residual is hard to be distinguished.

4th replication

Simulation

v14.sim <- sim(slmod2, repc, seed = 9876)
v14.sim.fitted <- purrr::map_dfr(1:19, function(i){
  df <- v14.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  tibble::tibble(df, fitted, resm_std, resc_std, index)
})
v14_simlcr <- purrr::map_dfr(1:19, function(i){
  df <- v14.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  R_half <- expm::sqrtm(R)

  auxqn <- eigen((R_half %*% P %*% R_half), symmetric = T, only.values = FALSE)

  p <- length(beta)

  lt <- sqrt(solve(diag((auxqn$values[1:(N-p)])))) %*% t(auxqn$vectors[1:N,1:(N-p)]) %*% solve(expm::sqrtm(R[1:N,1:N]))

  var.resmcp <- lt %*% varCondRes[1:N,1:N] %*% t(lt)

  resmcp <- (lt %*% resc[1:N] )/sqrt(diag(var.resmcp))
  .n = i

  tibble::tibble(.n, resmcp)
})
v14.sim.unit <- purrr::map_dfr(1:19, function(i){
  df <- v14.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  expand_grid(subjects) %>% 
    dplyr::mutate(Vi = map_dbl(subjects, ~{
      ind <- df %>% 
        mutate(index = 1:n()) %>% 
        filter(Subject == .x) %>% 
        pull(index)
      mi <- length(ind)
      Ei <- solve(expm::sqrtm(varMargRes[ind, ind])) %*% resm[ind]
      sqrt(sum((diag(mi) - Ei %*% t(Ei))^2)) / mi
      }),
      Mi = as.vector(t(u) %*% solve(varU) %*% u),
      index = 1:n(),
      .n = i)
})

Lineup plots

lineup(true = v1.fitted, samples = v14.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resm_std)) + geom_point() + geom_smooth(method = 'loess') +
  facet_wrap(~.sample, nrow = 5)
## `geom_smooth()` using formula 'y ~ x'

  • The true plot might hard to be identified.
lineup(true = v1.fitted, samples = v14.sim.fitted, pos = 13) %>% 
  ggplot(aes(index, resm_std, color = Subject)) + geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • The lineup maybe hard to find the true plot
lineup(true = v1.unit, samples = v14.sim.unit, pos = 9) %>% 
  ggplot(aes(index, Vi)) + geom_point() +
  facet_wrap(~.sample, nrow = 5)

lineup(true = v1.fitted, samples = v14.sim.fitted, pos = 13) %>% 
  ggplot(aes(index, resc_std, color = Subject)) + geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • The true plot is easily identified. It shows that the outlying observation of \(y\).
lineup(true = v1.fitted, samples = v14.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resc_std)) + geom_point() + 
  facet_wrap(~.sample, nrow = 5)

  • Some of the null plots may show some patterns.
lineup(true = v1.fitted, samples = v14.sim.fitted, pos = 4) %>% 
  ggplot(aes(sample = resc_std)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • Identified!

The true plot is identifiable, therefore, the conditional residual does not follow a Normal distribution.

lineup(true = v1.lcr, samples = v14_simlcr, pos = 4) %>% 
  ggplot(aes(sample = resmcp)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • It is hard to distinguish..

Version 2

We sampled the day (from 0 to 9) as the index. We added 50 to the reaction value for each subject on the same day which is sampled from.

repc2 <- sleepstudy

set.seed(1234)
samp <- sample(1:10, 1)
n <- length(unique(repc2$Subject))

colnames(repc2)[1] <- "y"

for (i in 1:n){
  repc2[(i*samp),]$y <- repc2[(i*samp),]$y + 50
}

repc2
##            y Days Subject
## 1   249.5600    0     308
## 2   258.7047    1     308
## 3   250.8006    2     308
## 4   321.4398    3     308
## 5   356.8519    4     308
## 6   414.6901    5     308
## 7   382.2038    6     308
## 8   290.1486    7     308
## 9   430.5853    8     308
## 10  516.3535    9     308
## 11  222.7339    0     309
## 12  205.2658    1     309
## 13  202.9778    2     309
## 14  204.7070    3     309
## 15  207.7161    4     309
## 16  215.9618    5     309
## 17  213.6303    6     309
## 18  217.7272    7     309
## 19  224.2957    8     309
## 20  287.3142    9     309
## 21  199.0539    0     310
## 22  194.3322    1     310
## 23  234.3200    2     310
## 24  232.8416    3     310
## 25  229.3074    4     310
## 26  220.4579    5     310
## 27  235.4208    6     310
## 28  255.7511    7     310
## 29  261.0125    8     310
## 30  297.5153    9     310
## 31  321.5426    0     330
## 32  300.4002    1     330
## 33  283.8565    2     330
## 34  285.1330    3     330
## 35  285.7973    4     330
## 36  297.5855    5     330
## 37  280.2396    6     330
## 38  318.2613    7     330
## 39  305.3495    8     330
## 40  404.0487    9     330
## 41  287.6079    0     331
## 42  285.0000    1     331
## 43  301.8206    2     331
## 44  320.1153    3     331
## 45  316.2773    4     331
## 46  293.3187    5     331
## 47  290.0750    6     331
## 48  334.8177    7     331
## 49  293.7469    8     331
## 50  421.5811    9     331
## 51  234.8606    0     332
## 52  242.8118    1     332
## 53  272.9613    2     332
## 54  309.7688    3     332
## 55  317.4629    4     332
## 56  309.9976    5     332
## 57  454.1619    6     332
## 58  346.8311    7     332
## 59  330.3003    8     332
## 60  303.8644    9     332
## 61  283.8424    0     333
## 62  289.5550    1     333
## 63  276.7693    2     333
## 64  299.8097    3     333
## 65  297.1710    4     333
## 66  338.1665    5     333
## 67  332.0265    6     333
## 68  348.8399    7     333
## 69  333.3600    8     333
## 70  412.0428    9     333
## 71  265.4731    0     334
## 72  276.2012    1     334
## 73  243.3647    2     334
## 74  254.6723    3     334
## 75  279.0244    4     334
## 76  284.1912    5     334
## 77  305.5248    6     334
## 78  331.5229    7     334
## 79  335.7469    8     334
## 80  427.2990    9     334
## 81  241.6083    0     335
## 82  273.9472    1     335
## 83  254.4907    2     335
## 84  270.8021    3     335
## 85  251.4519    4     335
## 86  254.6362    5     335
## 87  245.4523    6     335
## 88  235.3110    7     335
## 89  235.7541    8     335
## 90  287.2466    9     335
## 91  312.3666    0     337
## 92  313.8058    1     337
## 93  291.6112    2     337
## 94  346.1222    3     337
## 95  365.7324    4     337
## 96  391.8385    5     337
## 97  404.2601    6     337
## 98  416.6923    7     337
## 99  455.8643    8     337
## 100 508.9167    9     337
## 101 236.1032    0     349
## 102 230.3167    1     349
## 103 238.9256    2     349
## 104 254.9220    3     349
## 105 250.7103    4     349
## 106 269.7744    5     349
## 107 281.5648    6     349
## 108 308.1020    7     349
## 109 336.2806    8     349
## 110 401.6451    9     349
## 111 256.2968    0     350
## 112 243.4543    1     350
## 113 256.2046    2     350
## 114 255.5271    3     350
## 115 268.9165    4     350
## 116 329.7247    5     350
## 117 379.4445    6     350
## 118 362.9184    7     350
## 119 394.4872    8     350
## 120 439.0527    9     350
## 121 250.5265    0     351
## 122 300.0576    1     351
## 123 269.8939    2     351
## 124 280.5891    3     351
## 125 271.8274    4     351
## 126 304.6336    5     351
## 127 287.7466    6     351
## 128 266.5955    7     351
## 129 321.5418    8     351
## 130 397.5655    9     351
## 131 221.6771    0     352
## 132 298.1939    1     352
## 133 326.8785    2     352
## 134 346.8555    3     352
## 135 348.7402    4     352
## 136 352.8287    5     352
## 137 354.4266    6     352
## 138 360.4326    7     352
## 139 375.6406    8     352
## 140 438.5417    9     352
## 141 271.9235    0     369
## 142 268.4369    1     369
## 143 257.2424    2     369
## 144 277.6566    3     369
## 145 314.8222    4     369
## 146 317.2135    5     369
## 147 298.1353    6     369
## 148 348.1229    7     369
## 149 340.2800    8     369
## 150 416.5131    9     369
## 151 225.2640    0     370
## 152 234.5235    1     370
## 153 238.9008    2     370
## 154 240.4730    3     370
## 155 267.5373    4     370
## 156 344.1937    5     370
## 157 281.1481    6     370
## 158 347.5855    7     370
## 159 365.1630    8     370
## 160 422.2288    9     370
## 161 269.8804    0     371
## 162 272.4428    1     371
## 163 277.8989    2     371
## 164 281.7895    3     371
## 165 279.1705    4     371
## 166 284.5120    5     371
## 167 259.2658    6     371
## 168 304.6306    7     371
## 169 350.7807    8     371
## 170 419.4692    9     371
## 171 269.4117    0     372
## 172 273.4740    1     372
## 173 297.5968    2     372
## 174 310.6316    3     372
## 175 287.1726    4     372
## 176 329.6076    5     372
## 177 334.4818    6     372
## 178 343.2199    7     372
## 179 369.1417    8     372
## 180 414.1236    9     372
slmod3 <- lmer(y ~ Days + (Days - 1|Subject) + (1|Subject), data = repc2)

v2.fitted <- extract.fitted.mod.v1(slmod3, repc2)
v2.unit <- extract.unit.mod.v1(slmod3, repc2)
v2.lcr <- extract.lcr.mod.v1(slmod3, repc2)
ggplot(v2.fitted, aes(fitted, resm_std)) + geom_point() + geom_smooth(method = 'loess')
## `geom_smooth()` using formula 'y ~ x'

ggplot(v2.fitted, aes(index, resm_std, color = (index %in% c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180)))) + geom_point() + theme(legend.position = "none")

# Dashed line: third quartile + 1.5 interquartile range
LSlesverb <- as.numeric(quantile(v2.unit$Vi,prob = 0.75) + 1.5*(quantile(v2.unit$Vi, prob = 0.75) - quantile(v2.unit$Vi,prob = 0.25)))
ggplot(v2.unit, aes(index, Vi)) + geom_point() + geom_hline(yintercept = LSlesverb, lty = 2) + geom_text(
  data = v2.unit %>% 
    filter(Vi>LSlesverb),
  aes(label = index),
  nudge_x = 0.05, nudge_y = 0.05,
  size = 2
)

ggplot(v2.fitted, aes(index, resc_std, color = (index %in% c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180)))) + geom_point() + theme(legend.position = "none")

ggplot(v2.fitted, aes(fitted, resc_std)) + geom_point()

ggplot(v2.fitted, aes(sample = resc_std)) + geom_qq() + geom_qq_line(color = "red")

ggplot(v2.lcr, aes(sample = resmcp)) + geom_qq() + geom_qq_line(color = "red")

By adding the noise,

1st replication

Simulation

v21.sim <- sim(slmod3, repc2, seed = 9876)
v21.sim.fitted <- purrr::map_dfr(1:19, function(i){
  df <- v21.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  tibble::tibble(df, fitted, resm_std, resc_std, index)
})
v21_simlcr <- purrr::map_dfr(1:19, function(i){
  df <- v21.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  R_half <- expm::sqrtm(R)

  auxqn <- eigen((R_half %*% P %*% R_half), symmetric = T, only.values = FALSE)

  p <- length(beta)

  lt <- sqrt(solve(diag((auxqn$values[1:(N-p)])))) %*% t(auxqn$vectors[1:N,1:(N-p)]) %*% solve(expm::sqrtm(R[1:N,1:N]))

  var.resmcp <- lt %*% varCondRes[1:N,1:N] %*% t(lt)

  resmcp <- (lt %*% resc[1:N] )/sqrt(diag(var.resmcp))
  .n = i

  tibble::tibble(.n, resmcp)
})
v21.sim.unit <- purrr::map_dfr(1:19, function(i){
  df <- v21.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  expand_grid(subjects) %>% 
    dplyr::mutate(Vi = map_dbl(subjects, ~{
      ind <- df %>% 
        mutate(index = 1:n()) %>% 
        filter(Subject == .x) %>% 
        pull(index)
      mi <- length(ind)
      Ei <- solve(expm::sqrtm(varMargRes[ind, ind])) %*% resm[ind]
      sqrt(sum((diag(mi) - Ei %*% t(Ei))^2)) / mi
      }),
      Mi = as.vector(t(u) %*% solve(varU) %*% u),
      index = 1:n(),
      .n = i)
})

Lineup plots

lineup(true = v1.fitted, samples = v21.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resm_std)) + geom_point() + 
  geom_smooth(method = "loess") +
  facet_wrap(~.sample, nrow = 5)
## `geom_smooth()` using formula 'y ~ x'

lineup(true = v1.fitted, samples = v21.sim.fitted, pos = 5) %>% 
  ggplot(aes(index, resm_std, color = Subject)) + 
  geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

lineup(true = v1.unit, samples = v21.sim.unit, pos = 9) %>% 
  ggplot(aes(index, Vi)) + geom_point() +
  facet_wrap(~.sample, nrow = 5)

lineup(true = v1.fitted, samples = v21.sim.fitted, pos = 5) %>% 
  ggplot(aes(index, resc_std, color = Subject)) + 
  geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

Both marginal residual and conditional residual are hard to detect.

lineup(true = v1.fitted, samples = v21.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resc_std)) + geom_point() +
  facet_wrap(~.sample, nrow = 5)

Homoscedasticity in the case!

lineup(true = v1.fitted, samples = v21.sim.fitted, pos = 8) %>% 
  ggplot(aes(sample = resc_std)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • The true plot should be identified.
lineup(true = v1.lcr, samples = v21_simlcr, pos = 8) %>% 
  ggplot(aes(sample = resmcp)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • We hope the viewers can see that one point on the lower end and one point on the upper end are not expected in the true plot. Hence, the least confounded residual does not follow a normal distribution.

2nd replication

Simulation

v22.sim <- sim(slmod3, repc2, seed = 8765)
v22.sim.fitted <- purrr::map_dfr(1:19, function(i){
  df <- v22.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  tibble::tibble(df, fitted, resm_std, resc_std, index)
})
v22_simlcr <- purrr::map_dfr(1:19, function(i){
  df <- v22.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  R_half <- expm::sqrtm(R)

  auxqn <- eigen((R_half %*% P %*% R_half), symmetric = T, only.values = FALSE)

  p <- length(beta)

  lt <- sqrt(solve(diag((auxqn$values[1:(N-p)])))) %*% t(auxqn$vectors[1:N,1:(N-p)]) %*% solve(expm::sqrtm(R[1:N,1:N]))

  var.resmcp <- lt %*% varCondRes[1:N,1:N] %*% t(lt)

  resmcp <- (lt %*% resc[1:N] )/sqrt(diag(var.resmcp))
  .n = i

  tibble::tibble(.n, resmcp)
})
v22.sim.unit <- purrr::map_dfr(1:19, function(i){
  df <- v22.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  expand_grid(subjects) %>% 
    dplyr::mutate(Vi = map_dbl(subjects, ~{
      ind <- df %>% 
        mutate(index = 1:n()) %>% 
        filter(Subject == .x) %>% 
        pull(index)
      mi <- length(ind)
      Ei <- solve(expm::sqrtm(varMargRes[ind, ind])) %*% resm[ind]
      sqrt(sum((diag(mi) - Ei %*% t(Ei))^2)) / mi
      }),
      Mi = as.vector(t(u) %*% solve(varU) %*% u),
      index = 1:n(),
      .n = i)
})

Lineup plots

lineup(true = v1.fitted, samples = v22.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resm_std)) + geom_point() + geom_smooth(method = 'loess') +
  facet_wrap(~.sample, nrow = 5)
## `geom_smooth()` using formula 'y ~ x'

lineup(true = v1.fitted, samples = v22.sim.fitted, pos = 5) %>% 
  ggplot(aes(index, resm_std, color = Subject)) + geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

lineup(true = v1.unit, samples = v22.sim.unit, pos = 9) %>% 
  ggplot(aes(index, Vi)) + geom_point() +
  facet_wrap(~.sample, nrow = 5)

lineup(true = v1.fitted, samples = v22.sim.fitted, pos = 5) %>% 
  ggplot(aes(index, resc_std, color = Subject)) + geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • The conditional residual should be more easily to be identified.
lineup(true = v1.fitted, samples = v22.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resc_std)) + geom_point() + 
  facet_wrap(~.sample, nrow = 5)

lineup(true = v1.fitted, samples = v22.sim.fitted, pos = 8) %>% 
  ggplot(aes(sample = resc_std)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • true plot should be identified.
lineup(true = v1.lcr, samples = v22_simlcr, pos = 8) %>% 
  ggplot(aes(sample = resmcp)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) + xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • For checking the distribution of least confounded residual, the true plot may not easy to be distinguished among the 20 plots. Then we may fail to reject the null hypothesis thatt the least confounded residual follows a Gaussian distribution.

3rd replication

Simulation

v23.sim <- sim(slmod3, repc2, seed = 4387)
v23.sim.fitted <- purrr::map_dfr(1:19, function(i){
  df <- v23.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  tibble::tibble(df, fitted, resm_std, resc_std, index)
})
v23_simlcr <- purrr::map_dfr(1:19, function(i){
  df <- v23.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  R_half <- expm::sqrtm(R)

  auxqn <- eigen((R_half %*% P %*% R_half), symmetric = T, only.values = FALSE)

  p <- length(beta)

  lt <- sqrt(solve(diag((auxqn$values[1:(N-p)])))) %*% t(auxqn$vectors[1:N,1:(N-p)]) %*% solve(expm::sqrtm(R[1:N,1:N]))

  var.resmcp <- lt %*% varCondRes[1:N,1:N] %*% t(lt)

  resmcp <- (lt %*% resc[1:N] )/sqrt(diag(var.resmcp))
  .n = i

  tibble::tibble(.n, resmcp)
})
v23.sim.unit <- purrr::map_dfr(1:19, function(i){
  df <- v23.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  expand_grid(subjects) %>% 
    dplyr::mutate(Vi = map_dbl(subjects, ~{
      ind <- df %>% 
        mutate(index = 1:n()) %>% 
        filter(Subject == .x) %>% 
        pull(index)
      mi <- length(ind)
      Ei <- solve(expm::sqrtm(varMargRes[ind, ind])) %*% resm[ind]
      sqrt(sum((diag(mi) - Ei %*% t(Ei))^2)) / mi
      }),
      Mi = as.vector(t(u) %*% solve(varU) %*% u),
      index = 1:n(),
      .n = i)
})
Lineup plots
lineup(true = v1.fitted, samples = v23.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resm_std)) + geom_point() + geom_smooth(method = "loess") +
  facet_wrap(~.sample, nrow = 5)
## `geom_smooth()` using formula 'y ~ x'

lineup(true = v1.fitted, samples = v23.sim.fitted, pos = 15) %>% 
  ggplot(aes(index, resm_std, color = Subject)) + geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

lineup(true = v1.unit, samples = v23.sim.unit, pos = 9) %>% 
  ggplot(aes(index, Vi)) + geom_point() +
  facet_wrap(~.sample, nrow = 5)

lineup(true = v1.fitted, samples = v23.sim.fitted, pos = 15) %>% 
  ggplot(aes(index, resc_std, color = Subject)) + geom_point() + 
  facet_wrap(~.sample, nrow = 5) + theme(legend.position = "none")

  • We wish the viewers to the outlying observations in the true plot since the three points are the outliers compared with others.
lineup(true = v1.fitted, samples = v23.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resc_std)) + geom_point() + 
  facet_wrap(~.sample, nrow = 5)

lineup(true = v1.fitted, samples = v23.sim.fitted, pos = 20) %>% 
  ggplot(aes(sample = resc_std)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) + xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • The true plot should be identified.
lineup(true = v1.lcr, samples = v23_simlcr, pos = 20) %>% 
  ggplot(aes(sample = resmcp)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • From these two plots, the conditional residual is easy to identified but the least confounded residual is hard to distinguish.

4th replication

Simulation

v24.sim <- sim(slmod3, repc2, seed = 8696)
v24.sim.fitted <- purrr::map_dfr(1:19, function(i){
  df <- v24.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  tibble::tibble(df, fitted, resm_std, resc_std, index)
})
v24_simlcr <- purrr::map_dfr(1:19, function(i){
  df <- v24.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  R_half <- expm::sqrtm(R)

  auxqn <- eigen((R_half %*% P %*% R_half), symmetric = T, only.values = FALSE)

  p <- length(beta)

  lt <- sqrt(solve(diag((auxqn$values[1:(N-p)])))) %*% t(auxqn$vectors[1:N,1:(N-p)]) %*% solve(expm::sqrtm(R[1:N,1:N]))

  var.resmcp <- lt %*% varCondRes[1:N,1:N] %*% t(lt)

  resmcp <- (lt %*% resc[1:N] )/sqrt(diag(var.resmcp))
  .n = i

  tibble::tibble(.n, resmcp)
})
v24.sim.unit <- purrr::map_dfr(1:19, function(i){
  df <- v24.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  expand_grid(subjects) %>% 
    dplyr::mutate(Vi = map_dbl(subjects, ~{
      ind <- df %>% 
        mutate(index = 1:n()) %>% 
        filter(Subject == .x) %>% 
        pull(index)
      mi <- length(ind)
      Ei <- solve(expm::sqrtm(varMargRes[ind, ind])) %*% resm[ind]
      sqrt(sum((diag(mi) - Ei %*% t(Ei))^2)) / mi
      }),
      Mi = as.vector(t(u) %*% solve(varU) %*% u),
      index = 1:n(),
      .n = i)
})
Lineup plots
lineup(true = v1.fitted, samples = v24.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resm_std)) + geom_point() + geom_smooth(method = "loess") +
  facet_wrap(~.sample, nrow = 5)
## `geom_smooth()` using formula 'y ~ x'

lineup(true = v1.fitted, samples = v24.sim.fitted, pos = 15) %>% 
  ggplot(aes(index, resm_std, color = Subject)) + geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5) + xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

lineup(true = v1.unit, samples = v24.sim.unit, pos = 9) %>% 
  ggplot(aes(index, Vi)) + geom_point() +
  facet_wrap(~.sample, nrow = 5)

lineup(true = v1.fitted, samples = v24.sim.fitted, pos = 15) %>% 
  ggplot(aes(index, resc_std, color = Subject)) + geom_point() + 
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

The true plot is identified.

lineup(true = v1.fitted, samples = v24.sim.fitted, pos = 10) %>% 
  ggplot(aes(fitted, resc_std)) + geom_point() + 
  facet_wrap(~.sample, nrow = 5)

lineup(true = v1.fitted, samples = v24.sim.fitted, pos = 20) %>% 
  ggplot(aes(sample = resc_std)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • The true plot is identified.
lineup(true = v1.lcr, samples = v24_simlcr, pos = 20) %>% 
  ggplot(aes(sample = resmcp)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • True plot may not be identified.

Version 3

By sampling one value from 1 to 180 as the index of the data, I added 200 since the value is close to mean (298.5078917) to that specific reaction value as an obvious outlier.

set.seed(20201010)
samp <- sample(1:180, 2)
repc3 <- sleepstudy
repc3[samp,]$Reaction <- repc3[samp,]$Reaction + 150 #summary(sleepstudy$Reaction)[4]/2
colnames(repc3)[1] <- c("y")
slmod4 <- lmer(y ~ Days + (Days - 1|Subject) + (1|Subject), data = repc3)

v3.fitted <- extract.fitted.mod.v1(slmod4, repc3)
v3.lcr <- extract.lcr.mod.v1(slmod4, repc3)
v3.unit <- extract.unit.mod.v1(slmod4, repc3)
ggplot(v3.fitted, aes(fitted, resm_std)) + geom_point() + geom_smooth(method = "loess")
## `geom_smooth()` using formula 'y ~ x'

ggplot(v3.fitted, aes(index, resm_std, color = (index %in% samp))) + geom_point() + theme(legend.position = "none")

ggplot(v3.fitted, aes(index, resc_std, color = (index %in% samp))) + geom_point() + theme(legend.position = "none") + geom_text(data = v3.fitted %>% filter(abs(resc_std)>4), aes(label = index), nudge_x = 0.35, nudge_y = 0.35, size = 2)

ggplot(v3.fitted,aes(fitted, resc_std)) + geom_point() 

ggplot(v3.unit, aes(index, Vi)) + geom_point()

ggplot(v3.fitted, aes(sample = resc_std)) + geom_qq() + geom_qq_line(color = "red")

ggplot(v3.lcr, aes(sample = resmcp)) + geom_qq() + geom_qq_line(color = "red")

a <- cbind(v3.fitted[1:178,], v3.lcr)
b <- sort(a$resc_std)
c <- sort(a$resmcp)

plot(b,c)
abline(0,1)

1st replication

Simulation

v31.sim <- sim(slmod4, repc3, seed = 2342)
v31.sim.fitted <- purrr::map_dfr(1:19, function(i){
  df <- v31.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  tibble::tibble(df, fitted, resm_std, resc_std, index)
})
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00345139 (tol = 0.002, component 1)
v31_simlcr <- purrr::map_dfr(1:19, function(i){
  df <- v31.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  R_half <- expm::sqrtm(R)

  auxqn <- eigen((R_half %*% P %*% R_half), symmetric = T, only.values = FALSE)

  p <- length(beta)

  lt <- sqrt(solve(diag((auxqn$values[1:(N-p)])))) %*% t(auxqn$vectors[1:N,1:(N-p)]) %*% solve(expm::sqrtm(R[1:N,1:N]))

  var.resmcp <- lt %*% varCondRes[1:N,1:N] %*% t(lt)

  resmcp <- (lt %*% resc[1:N] )/sqrt(diag(var.resmcp))
  .n = i

  tibble::tibble(.n, resmcp)
})
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00345139 (tol = 0.002, component 1)
v31.sim.unit <- purrr::map_dfr(1:19, function(i){
  df <- v31.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  expand_grid(subjects) %>% 
    dplyr::mutate(Vi = map_dbl(subjects, ~{
      ind <- df %>% 
        mutate(index = 1:n()) %>% 
        filter(Subject == .x) %>% 
        pull(index)
      mi <- length(ind)
      Ei <- solve(expm::sqrtm(varMargRes[ind, ind])) %*% resm[ind]
      sqrt(sum((diag(mi) - Ei %*% t(Ei))^2)) / mi
      }),
      Mi = as.vector(t(u) %*% solve(varU) %*% u),
      index = 1:n(),
      .n = i)
})
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00345139 (tol = 0.002, component 1)

Lineup plots

lineup(true = v1.fitted, samples = v31.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resm_std)) + geom_point() + geom_smooth(method = 'loess') +
  facet_wrap(~.sample, nrow = 5)
## `geom_smooth()` using formula 'y ~ x'

lineup(true = v1.fitted, samples = v31.sim.fitted, pos = 15) %>% 
  ggplot(aes(index, resm_std, color = Subject)) + geom_point() + 
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

lineup(true = v1.unit, samples = v31.sim.unit, pos = 9) %>% 
  ggplot(aes(index, Vi)) + geom_point() +
  facet_wrap(~.sample, nrow = 5)

True plot might be seen (panel 9)

lineup(true = v1.fitted, samples = v31.sim.fitted, pos = 15) %>% 
  ggplot(aes(index, resc_std, color = Subject)) + geom_point() + 
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • The true plot should be identified.
lineup(true = v1.fitted, samples = v31.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resc_std)) + geom_point() + 
  facet_wrap(~.sample, nrow = 5)

lineup(true = v1.fitted, samples = v31.sim.fitted, pos = 11) %>% 
  ggplot(aes(sample = resc_std)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

lineup(true = v1.lcr, samples = v31_simlcr, pos = 11) %>% 
  ggplot(aes(sample = resmcp)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • The conditional residual is easier to detect that if it follows a normal distribution. While the least confounded residual may be hard to identify.

2nd replication

Simulation

v32.sim <- sim(slmod4, repc3, seed = 2353)
v32.sim.fitted <- purrr::map_dfr(1:19, function(i){
  df <- v32.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  tibble::tibble(df, fitted, resm_std, resc_std, index)
})
v32_simlcr <- purrr::map_dfr(1:19, function(i){
  df <- v32.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  R_half <- expm::sqrtm(R)

  auxqn <- eigen((R_half %*% P %*% R_half), symmetric = T, only.values = FALSE)

  p <- length(beta)

  lt <- sqrt(solve(diag((auxqn$values[1:(N-p)])))) %*% t(auxqn$vectors[1:N,1:(N-p)]) %*% solve(expm::sqrtm(R[1:N,1:N]))

  var.resmcp <- lt %*% varCondRes[1:N,1:N] %*% t(lt)

  resmcp <- (lt %*% resc[1:N] )/sqrt(diag(var.resmcp))
  .n = i

  tibble::tibble(.n, resmcp)
})
v32.sim.unit <- purrr::map_dfr(1:19, function(i){
  df <- v32.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  expand_grid(subjects) %>% 
    dplyr::mutate(Vi = map_dbl(subjects, ~{
      ind <- df %>% 
        mutate(index = 1:n()) %>% 
        filter(Subject == .x) %>% 
        pull(index)
      mi <- length(ind)
      Ei <- solve(expm::sqrtm(varMargRes[ind, ind])) %*% resm[ind]
      sqrt(sum((diag(mi) - Ei %*% t(Ei))^2)) / mi
      }),
      Mi = as.vector(t(u) %*% solve(varU) %*% u),
      index = 1:n(),
      .n = i)
})
Lineup plots
lineup(true = v1.fitted, samples = v32.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resm_std)) + geom_point() + geom_smooth(method = 'loess') +
  facet_wrap(~.sample, nrow = 5)
## `geom_smooth()` using formula 'y ~ x'

lineup(true = v1.fitted, samples = v32.sim.fitted, pos = 15) %>% 
  ggplot(aes(index, resm_std, color = Subject)) + geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

lineup(true = v1.unit, samples = v32.sim.unit, pos = 9) %>% 
  ggplot(aes(index, Vi)) + geom_point() +
  facet_wrap(~.sample, nrow = 5)

lineup(true = v1.fitted, samples = v32.sim.fitted, pos = 15) %>% 
  ggplot(aes(index, resc_std, color = Subject)) + geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

lineup(true = v1.fitted, samples = v32.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resc_std)) + geom_point() + 
  facet_wrap(~.sample, nrow = 5)

lineup(true = v1.fitted, samples = v32.sim.fitted, pos = 11) %>% 
  ggplot(aes(sample = resc_std)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • True plot is identified, then the conditonal residuals does not follow a normal distribution!
lineup(true = v1.lcr, samples = v32_simlcr, pos = 11) %>% 
  ggplot(aes(sample = resmcp)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

3rd replication

Simulation

v33.sim <- sim(slmod4, repc3, seed = 4352)
v33.sim.fitted <- purrr::map_dfr(1:19, function(i){
  df <- v33.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  tibble::tibble(df, fitted, resm_std, resc_std, index)
})
v33_simlcr <- purrr::map_dfr(1:19, function(i){
  df <- v33.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  R_half <- expm::sqrtm(R)

  auxqn <- eigen((R_half %*% P %*% R_half), symmetric = T, only.values = FALSE)

  p <- length(beta)

  lt <- sqrt(solve(diag((auxqn$values[1:(N-p)])))) %*% t(auxqn$vectors[1:N,1:(N-p)]) %*% solve(expm::sqrtm(R[1:N,1:N]))

  var.resmcp <- lt %*% varCondRes[1:N,1:N] %*% t(lt)

  resmcp <- (lt %*% resc[1:N] )/sqrt(diag(var.resmcp))
  .n = i

  tibble::tibble(.n, resmcp)
})
v33.sim.unit <- purrr::map_dfr(1:19, function(i){
  df <- v33.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  expand_grid(subjects) %>% 
    dplyr::mutate(Vi = map_dbl(subjects, ~{
      ind <- df %>% 
        mutate(index = 1:n()) %>% 
        filter(Subject == .x) %>% 
        pull(index)
      mi <- length(ind)
      Ei <- solve(expm::sqrtm(varMargRes[ind, ind])) %*% resm[ind]
      sqrt(sum((diag(mi) - Ei %*% t(Ei))^2)) / mi
      }),
      Mi = as.vector(t(u) %*% solve(varU) %*% u),
      index = 1:n(),
      .n = i)
})
Lineup plots
lineup(true = v1.fitted, samples = v33.sim.fitted, pos = 10) %>% 
  ggplot(aes(fitted, resm_std)) + geom_point() + geom_smooth(method = "loess") +
  facet_wrap(~.sample, nrow = 5)
## `geom_smooth()` using formula 'y ~ x'

lineup(true = v1.fitted, samples = v33.sim.fitted, pos = 2) %>% 
  ggplot(aes(index, resm_std, color = Subject)) + 
  geom_point() + 
  geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

lineup(true = v1.unit, samples = v33.sim.unit, pos = 11) %>% 
  ggplot(aes(index, Vi)) + geom_point() +
  facet_wrap(~.sample, nrow = 5)

lineup(true = v1.fitted, samples = v33.sim.fitted, pos = 2) %>% 
  ggplot(aes(index, resc_std, color = Subject)) + geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

True plot is identified.

lineup(true = v1.fitted, samples = v33.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resc_std)) + geom_point() + 
  facet_wrap(~.sample, nrow = 5)

lineup(true = v1.fitted, samples = v33.sim.fitted, pos = 2) %>% 
  ggplot(aes(sample = resc_std)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

true plot is identified, the conditional residual does not follow a normal distribution.

lineup(true = v1.lcr, samples = v33_simlcr, pos = 2) %>% 
  ggplot(aes(sample = resmcp)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

  • The least conditional residual maybe not easy to be identified.

4th replication

Simulation

v34.sim <- sim(slmod4, repc3, seed = 1234)
v34.sim.fitted <- purrr::map_dfr(1:19, function(i){
  df <- v34.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  tibble::tibble(df, fitted, resm_std, resc_std, index)
})
v34_simlcr <- purrr::map_dfr(1:19, function(i){
  df <- v34.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  R_half <- expm::sqrtm(R)

  auxqn <- eigen((R_half %*% P %*% R_half), symmetric = T, only.values = FALSE)

  p <- length(beta)

  lt <- sqrt(solve(diag((auxqn$values[1:(N-p)])))) %*% t(auxqn$vectors[1:N,1:(N-p)]) %*% solve(expm::sqrtm(R[1:N,1:N]))

  var.resmcp <- lt %*% varCondRes[1:N,1:N] %*% t(lt)

  resmcp <- (lt %*% resc[1:N] )/sqrt(diag(var.resmcp))
  .n = i

  tibble::tibble(.n, resmcp)
})
v34.sim.unit <- purrr::map_dfr(1:19, function(i){
  df <- v34.sim %>% filter(.n == i)
  m <- lmer(y ~ Days + (Days -1 |Subject) + (1|Subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$Subject)
  nsubjects <- length(subjects)

  y <- df$y
  X <- model.matrix(~ Days, data = df)
  beta <- fixef(m)
  Z <- getME(m, "Z")
  u <- c(ranef(m)$Subject[,1], ranef(m)$Subject[,2])
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G1 <- vc$vcov[vc$grp == 'Subject'] * diag(nsubjects)
  G2 <- vc$vcov[vc$grp == 'Subject.1'] * diag(nsubjects)
  G <- Matrix::bdiag(G1, G2)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - as.vector(Z %*% u)
  index = 1:nrow(df)
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  expand_grid(subjects) %>% 
    dplyr::mutate(Vi = map_dbl(subjects, ~{
      ind <- df %>% 
        mutate(index = 1:n()) %>% 
        filter(Subject == .x) %>% 
        pull(index)
      mi <- length(ind)
      Ei <- solve(expm::sqrtm(varMargRes[ind, ind])) %*% resm[ind]
      sqrt(sum((diag(mi) - Ei %*% t(Ei))^2)) / mi
      }),
      Mi = as.vector(t(u) %*% solve(varU) %*% u),
      index = 1:n(),
      .n = i)
})

Lineup plots

lineup(true = v1.fitted, samples = v34.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resm_std)) + geom_point() + geom_smooth(method = "loess") +
  facet_wrap(~.sample, nrow = 5)
## `geom_smooth()` using formula 'y ~ x'

lineup(true = v1.fitted, samples = v34.sim.fitted, pos = 2) %>% 
  ggplot(aes(index, resm_std, color = Subject)) + geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

lineup(true = v1.unit, samples = v34.sim.unit, pos = 9) %>% 
  ggplot(aes(index, Vi)) + geom_point() +
  facet_wrap(~.sample, nrow = 5)

lineup(true = v1.fitted, samples = v34.sim.fitted, pos = 2) %>% 
  ggplot(aes(index, resc_std, color = Subject)) + geom_point() + geom_hline(yintercept = 0) +
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

lineup(true = v1.fitted, samples = v34.sim.fitted, pos = 9) %>% 
  ggplot(aes(fitted, resc_std)) + geom_point() + 
  facet_wrap(~.sample, nrow = 5)

lineup(true = v1.fitted, samples = v34.sim.fitted, pos = 6) %>% 
  ggplot(aes(sample = resc_std)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) +
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")

lineup(true = v1.lcr, samples = v34_simlcr, pos = 6) %>% 
  ggplot(aes(sample = resmcp)) + geom_qq() + geom_qq_line(color = "red") +
  facet_wrap(~.sample, nrow = 5) + 
  xlab(NULL) + ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), 
        legend.position="none")